/**********************************************************************/
/*
Author: Karan Makkar, adapted from Michelle's code
Created: Nov 2023
Modification Date: Aug 2025, by Youssef Assarssah
Description: Master program to run and output 2021 online survey outcomes regressions
Output: TableA25 TableA26_1, TableA26_2
*/
/**********************************************************************/


/*----------------------------------------------------*/
* Section: Setup
/*----------------------------------------------------*/

* include filepaths 
if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"

clear all
set more off
set matsize 11000
set seed 123456789

* Log
cap log close
global prefix: display %tdCYND td(`c(current_date)')
log using "$KP_logs/${prefix}_leebounds_outcomes_master.txt", text replace

/*----------------------------------------------------*/
* Section: Run programs and set dataset filepaths
/*----------------------------------------------------*/

* Regression programs
include "$KP_do/3 Functions/impact_regression_programs"

* Regression programs
include "$KP_do/3 Functions/leebound_regression_programs"

* Table formatting and flipping programs
include "$KP_do/3 Functions/impact_table_programs"

/*----------------------------------------------------*/
* Section: Outcome variable lists
/*----------------------------------------------------*/

* Main Table Vars
global vars_21_main_1 tot_cons_adj ///
        ladder_now ///
        any_buy ///
        any_sell ///
	net_ast_buy_w_thous ///
        loan_borrow ///
        loan_get ///
        loan_give ///
        consmooth_move ///

global vars_21_main_2 employed ///
        self_emp_bus_owner ///
        wage_adj ///
        income_adj ///
        job_search ///
        hh_new_bus ///
        job_satisfy_n ///
        business_ability ///
        switch_industry_1 

* Training Vars
global vars_21_training ever_win_17 ///
        take_courses ///
        course_hours

* Other Vars
global vars_21_other depress_disint_n ///
        efficacy_n ///
        job_web ///
        ewallet_buy ///
        choose_ewallet ///
        views_gov_help_n ///
        views_allocation_n
      
/*----------------------------------------------------*/
* Section: Regression Specifications
/*----------------------------------------------------*/

* SE CLUSTERING (OLS): Individual level
global vce vce(cluster anon_id4)

* INSTRUMENT (IV): win in batch
global instrument_iv win_in_batch

* SE CLUSTERING (IV): Individual level
global cluster cluster(anon_id4)

* FIXED EFFECTS: Strata and inputs into PMO-assigned weight interacted by batch
global absorb absorb(strata gender#batch urban#batch ed_cat#batch)

* FIXED EFFECTS: Inputs into PMO-assigned weight interacted by batch
global demog_batch absorb(gender#batch urban#batch ed_cat#batch)

* FIXED EFFECTS (interacted with female): Inputs into PMO-assigned weight interacted by batch
global demog_female absorb(gender#batch urban#batch ed_cat#batch)  

* FIXED EFFECTS (interacted with urban): Inputs into PMO-assigned weight interacted by batch
global demog_urban absorb(gender#batch#urban urban#batch ed_cat#batch#urban)

* FIXED EFFECTS (interacted with java): Inputs into PMO-assigned weight interacted by batch
global demog_java absorb(gender#batch#java urban#batch#java ed_cat#batch#java)  

* FIXED EFFECTS (interacted with above_hs): Inputs into PMO-assigned weight interacted by batch
global demog_abovehs absorb(gender#batch#above_hs urban#batch#above_hs ed_cat#batch)  

* FIXED EFFECTS (interacted with young): Inputs into PMO-assigned weight interacted by batch
global demog_young absorb(gender#batch#young urban#batch#young ed_cat#batch#young)  

* FIXED EFFECTS (interacted with numapp): Inputs into PMO-assigned weight interacted by batch
global demog_numapp absorb(gender#batch#numapp_xt urban#batch#numapp_xt ed_cat#batch#numapp_xt)  

* FIXED EFFECTS: Strata only
global absorb_strata absorb(strata)

* FIXED EFFECTS: Strata win probability only
global absorb_winprob absorb(stratum_win_prob)

* SAMPLE RESTRICTIONS: after batch 7
global after_batch_7 batch >= 7  

* SAMPLE RESTRICTIONS: after batch 7
global after_batch_18 batch >= 18

* SAMPLE RESTRICTIONS: after batch 7 and before batch 11 (for comparison to 2022 survey)
global batch_7_11 batch >= 7 & batch <= 11  

* COMPLETED SURVEY RESTRICTION:
global and_completed & completed == 1 & consented == 1 & verified == 1

/*----------------------------------------------------------------*/
* Section: Data Setup
/*----------------------------------------------------------------*/

* Load 2021 data
use "$KP_deid_survey/2021/Clean/071323_survey_21_deid_clean_merged.dta", clear
di _N 
* Keep only those sent survey
keep if sent_survey==1 | !mi(progress)
di _N 

*Winsorize hours course
winsor2 course_hours, replace cut(0 99)

// Generate total value of assets purchased/sold (KA02, KA06, KA10, KA14)
* Handl NA, if indv didn't buy/sell (0) and it's NA if they buy and still missing 

preserve 

* Calculate at the individual level 
duplicates drop anon_id4 bike_* devices_* gold_* livestock_*, force
distinct anon_id4 // 154336
  
* Replace asset values with 0 if the indiv didn't buy/sell 
local asset_types bike devices gold livestock

foreach type in `asset_types'  {
    foreach action in buy sell {
        local var "`type'_`action'"
        replace `var'_val = 0 if `var' == 0
	
	local `action'_val_vars ``action'_val_vars' `type'_`action'_val
	}
}

* Generate the sum variables
foreach action in buy sell {
	egen tot_ast_`action' = rowtotal(``action'_val_vars'), mis
}

rename tot_ast_sell tot_ast_sold 
* Winsorize total asset vars at 99.5th percentile
winsor2 tot_ast_buy* tot_ast_sold*, cuts(0 99.5) suffix(_w)

* Generate Net asset puchases
foreach suff in "" "_w" {
	gen net_ast_buy`suff'  = tot_ast_buy`suff'  - tot_ast_sold`suff'
	gen net_ast_sold`suff' = tot_ast_sold`suff' - tot_ast_buy`suff' 
}

* Generate thousand scaled vars 
local thous_vars tot_ast_buy tot_ast_sold tot_ast_buy_w tot_ast_sold_w /// 
               net_ast_buy net_ast_buy_w  

foreach var of local thous_vars {
	cap drop `var'_thous
	gen `var'_thous = `var'/1000
	local vars_thous `vars_mil' `var'_thous
	
	* Label new variable 
		local label : variable label `var'
		dis "`label'"
		label variable `var'_thous "`label' (thousands IDR)"
}

tempfile asset_data
save `asset_data'
restore 

* Merge back with person-batch level data 
merge m:1 anon_id4 using `asset_data', gen(merge_asset)

* X VAR: Generate placeholder
gen x = 0
la var x "Treated"

*Education cat
recode education (1/2 = 1) (3 = 2) (4/8 = 3), gen(ed_cat) 

cap drop age_cat
recode age (0/30 = 1) (31/50 = 2) (51/100 = 3), gen(age_cat) 

* Cleaning (move to cleaning file later)
replace yrs_school = 13 if yrs_school==13.5
replace temp_no_work = . if missing(employed)

summ final_test_score
gen test_score_bins = 1 if final_test_score < 60
replace test_score_bins = 2 if 60 <= final_test_score & final_test_score < 80
replace test_score_bins = 3 if 80 <= final_test_score & final_test_score <= 100
tab test_score_bins, m

* Var for constant_reg function
gen female = gender ==0
gen constant = 1
gen missing =.

* Drop non-randomized batches
drop if inlist(batch, 1, 15)

/*----------------------------------------------------------------*/
* Section: 2021 IV Regressions
/*----------------------------------------------------------------*/

forvalues i = 1/2 {
    * Declare wave-specific specifications
    global columns iva_21
    global column_num 1

    * Declare empty model and variable lists
    global model_list
    global var_list

    * Declare variable list for table
    global vars ${vars_21_main_`i'}

    * Controls local
    local c "stratum_win_prob c.stratum_win_prob#i.batch"

    * loop through each variable of table
    local varcount = 1
    foreach y of varlist $vars {

        * Run IV regression (actual win)
        replace x = ever_win_17
        lee_iv_reg `y'_iva_21 `y' x "$instrument_iv" "`c'" "$after_batch_7 $and_completed" "$demog_batch" "$cluster" `varcount' // (batch 2-17)

        * Add models to list
        global model_list $model_list `y'_iva_21
        * Add var to list
        global var_list $var_list `y'

        local varcount = `varcount' + 1
    }

    preserve
    di "`varcount'"
    keep if _n ==1
    local varcount = `varcount'-1
    di "`varcount'"
    di "${bounds_`varcount'}"
    forvalues j = 1/`varcount'{
        gen bounds_`j' = "${bounds_`j'}"
    }
    keep bounds_*
    export delimited using "$KP_output/tables/impacts/csv/bounds_iv`i'_2021", replace novarnames
    restore

    * Flip table and export table
    save_sterfile "$KP_output/tables/impacts/sterfiles/online21_iv`i'_temp.ster"       
    flip_table x "$var_list" "$model_list" "$columns" "$column_num"
    
    if `i' ==1 export_table_lee "$columns" "$KP_output/tables/impacts/tex/online21_iv`i'_temp" "tot_cons_adj"
    else export_table_lee_noobs "$columns" "$KP_output/tables/impacts/tex/online21_iv`i'_temp" "employed"
}

/*----------------------------------------------------------------*/
* Section: 2021 Training Regressions
/*----------------------------------------------------------------*/

* Declare wave-specific specifications
global columns iva_21
global column_num 1

* Declare empty model and variable lists
global model_list
global var_list

* Declare variable list for table
global vars $vars_21_training

* Controls local
local c "stratum_win_prob c.stratum_win_prob#i.batch"

* loop through each variable of table
local varcount = 1
foreach y of varlist $vars {

    *OLS
    if "`y'" == "ever_win_17" {
        replace x = win_in_batch
        lee_ols_reg `y'_iva_21 `y' x "`c'" "$after_batch_7 $and_completed" "$demog_batch" "$cluster" `varcount' // (batch 2-17)
    }
    * IV
    if "`y'" != "ever_win_17" {
        replace x = ever_win_17
        lee_iv_reg `y'_iva_21 `y' x "$instrument_iv" "`c'" "$after_batch_7 $and_completed" "$demog_batch" "$cluster" `varcount' // (batch 2-17)
    }
    * Add models to list
    global model_list $model_list `y'_iva_21
    * Add var to list
    global var_list $var_list `y'

    local varcount = `varcount' + 1
}

preserve
di "`varcount'"
keep if _n ==1
local varcount = `varcount'-1
di "`varcount'"
di "${bounds_`varcount'}"
forvalues j = 1/`varcount'{
    gen bounds_`j' = "${bounds_`j'}"
}
keep bounds_*
export delimited using "$KP_output/tables/impacts/csv/bounds_training_iv_2021", replace novarnames
restore

* Flip table and export table
save_sterfile "$KP_output/tables/impacts/sterfiles/online21_training_iv_temp.ster"       
flip_table x "$var_list" "$model_list" "$columns" "$column_num"
export_table_lee "$columns" "$KP_output/tables/impacts/tex/online21_training_iv_temp" "ever_win_17"

    ****** Add in Lee Bounds ******
python: 
import csv

def insert_strings(tex_file_path, csv_file_path, output_file_path, insert_positions):

    with open(csv_file_path, newline='') as csvfile:
        reader = csv.reader(csvfile)
        strings_to_insert = next(reader)  # Assuming only one row in CSV

    with open(tex_file_path, 'r') as file:
        lines = file.readlines()

    for i, pos in enumerate(insert_positions):
        lines.insert(pos + i, "            &  " + strings_to_insert[i] + "  \\\\\n")

    with open(output_file_path, 'w') as file:
        file.writelines(lines)

insert_positions = [4, 7, 10, 13, 16, 19, 22, 25]    
tex_file_path = f'$KP_output/tables/impacts/tex/online21_iv1_temp.tex'
csv_file_path = f'$KP_output/tables/impacts/csv/bounds_iv1_2021.csv'
output_file_path = f'$KP_output/tables/impacts/tex/TableA26_1.tex'
        
insert_strings(tex_file_path, csv_file_path, output_file_path, insert_positions)

insert_positions = [4, 7, 10, 13, 16, 19, 22, 25, 28]    
tex_file_path = f'$KP_output/tables/impacts/tex/online21_iv2_temp.tex'
csv_file_path = f'$KP_output/tables/impacts/csv/bounds_iv2_2021.csv'
output_file_path = f'$KP_output/tables/impacts/tex/TableA26_2.tex'
        
insert_strings(tex_file_path, csv_file_path, output_file_path, insert_positions)

insert_positions = [4, 7, 10]    
tex_file_path = f'$KP_output/tables/impacts/tex/online21_training_iv_temp.tex'
csv_file_path = f'$KP_output/tables/impacts/csv/bounds_training_iv_2021.csv'
output_file_path = f'$KP_output/tables/impacts/tex/TableA25.tex'
        
insert_strings(tex_file_path, csv_file_path, output_file_path, insert_positions)
end